#!/usr/bin/python 
#coding=utf-8

import re
import time
import datetime

#题目：有如下log文件，请打印出独立IP，并统计独立IP数，（分别使用python和shell），检测异常ip（1分钟内访问超过30次）:

file=open("ipHttpData.ipHttpData")

line=file.readline()
dict={}

while line:
    pattern=re.compile("(\S+) - - \[(\S+)")
    res=pattern.search(line).groups()
    if res[0] in dict :
        dict[res[0]].append(res[1])
    else :
        list=[res[1]]
        dict[res[0]]=list
    line=file.readline()

print "独立IP包括:",dict.keys(),"，独立IP数一共有",len(dict.keys()),"个\n" 

node="%d/%B/%Y:%H:%M:%S"
for i in range(0,len(dict.keys())):
    if len(dict[dict.keys()[i]]) > 30 :
        for j in range(0,len(dict[dict.keys()[i]])-30):
            a = datetime.datetime.strptime(dict[dict.keys()[i]][j],node)
            b = datetime.datetime.strptime(dict[dict.keys()[i]][j+30],node)
            c = b - a
            if c.seconds<60 :
                print "异常IP包括:",dict.keys()[i]
                break

#timestr1="23/May/2006:08:58:08"
#timestr2="23/May/2006:08:58:18"
#a_ = datetime.datetime.strptime(timestr1,node)
#b_ = datetime.datetime.strptime(timestr2,node)
#c = b_ - a_
#print a_ 
#print b_  
#print c.seconds




